Introduction

#windowsFonts(font = windowsFont('Helvetica'))

crs = 4326

library(vroom)
library(sf)
library(terra)
library(dplyr)
library(spData)
library(mapview)
library(geosphere)
library(sp)
library(rgeos)
library(ggplot2)
library(ggmap)
library(kableExtra)
library(tidyverse)
library(data.table)
#remotes::install_github("CityOfPhiladelphia/rphl")
library(rphl)
library(lubridate)
library(furrr)
library(tidycensus)
library(rgdal)
library(furrr)
library(mapview)
library(NbClust)
library(cluster)
library(psych)
ll <- function(dat, proj4 = 4326){st_transform(dat, proj4)}

census_api_key("b33ec1cb4da108659efd12b3c15412988646cbd8", overwrite = TRUE)

root.dir = "https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/DATA/"
source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")

plotTheme <- function(base_size = 9, title_size = 10){
  theme(
    text = element_text( color = "black"),
    plot.title = element_text(size = title_size, colour = "black", hjust = 0.5), 
    plot.subtitle = element_text( face = 'italic',
                                 size = base_size, colour = "black", hjust = 0.5),
    plot.caption = element_text( hjust=0),
    axis.ticks = element_blank(),
    panel.background = element_blank(),
    panel.grid.major = element_line("grey80", size = 0.01),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(colour = "black", fill=NA, size=.5),
    strip.background = element_blank(),
    strip.text = element_text( size=9),
    axis.title = element_text( size=9),
    axis.text = element_text( size=7),
    axis.text.y = element_text( size=7),
    plot.background = element_blank(),
    legend.background = element_blank(),
    legend.title = element_text( colour = "black", face = "italic", size = 9),
    legend.text = element_text( colour = "black", face = "italic", size = 7),
    strip.text.x = element_text( size = 9),
    legend.key.size = unit(.5, 'line')
  )
}

mapTheme <- function(base_size = 9, title_size = 10){
  theme(
    text = element_text( color = "black"),
    plot.title = element_text(size = title_size, colour = "black", hjust = 0.5), 
    plot.subtitle = element_text( face = 'italic',
                                 size = base_size, colour = "black", hjust = 0.5),
    plot.caption = element_text( hjust=0),
    axis.ticks = element_blank(),
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    strip.background = element_blank(),
    strip.text = element_text(size=base_size),
    axis.title = element_text( size=9),
    axis.text = element_blank(),
    axis.text.y = element_blank(),
    plot.background = element_blank(),
    legend.background = element_blank(),
    legend.title = element_text( colour = "black", face = "italic", size = 9),
    legend.text = element_text( colour = "black", face = "italic", size = 7),
    strip.text.x = element_text(size=base_size),
    legend.key.size = unit(.5, 'line')
  )
}

palette9 <- c('#315d7f', '#4f5d7f', '#6d5c7e', '#a36681', '#d96f83', '#f2727f', '#f6928a', '#f8a28f', '#f9b294')
palette7 <- c('#315d7f', '#4f5d7f', '#6d5c7e', '#d96f83', '#f2727f', '#f6928a', '#f9b294')
palette5 <- c("#f9b294","#f2727f","#c06c86","#6d5c7e","#315d7f")
palette4 <- c("#f9b294","#f2727f","#c06c86","#6d5c7e")
palette2 <- c("#f9b294","#f2727f")
palette1_main <- "#F2727F"
palette1_assist <- '#F9B294'

xxxxXXXXX — from Jeff

District & Service Area

pprDistrict <- st_read('https://opendata.arcgis.com/datasets/0cdc4a1e86c6463b9600f9d9fca39875_0.geojson') %>%
  st_transform(crs)

pprServiceArea <- read_sf(dsn="data/FromPPR/PPR_Service_Areas_2021/PPR_Service_Areas_2021.shp")%>%
  st_transform(crs = crs)

base_map <- get_map(location = unname(st_bbox(ll(st_buffer(st_union(pprDistrict),11000)))),maptype = "terrian")

ggmap(base_map) + 
  geom_sf(data=ll(st_union(pprDistrict)),color="black",size=2,fill = "transparent",inherit.aes = FALSE)+
  geom_sf(data=ll(pprDistrict),color='black',size=2,fill = "transparent",inherit.aes = FALSE)+
  geom_sf(data=ll(pprServiceArea),color='black',size=1,fill = "transparent",inherit.aes = FALSE)+
  geom_sf(data=ll(pprDistrict %>% filter(DISTRICTID %in% c(7,8,9))),color=palette1_main,size=2,fill = "transparent",inherit.aes = FALSE)+
  geom_sf(data=ll(pprServiceArea %>% filter(PPR_DIST %in% c(7,8,9))),color=palette1_main,size=1,fill = "transparent",inherit.aes = FALSE)+
  labs(title = "", 
       subtitle = "",
       x="",y="")+
  mapTheme()
Figure. Locations of PPR Districts and Service Areas

Figure. Locations of PPR Districts and Service Areas

The whole Philadelphia is divided into 10 PPR districts, each district unit is divided into several service area units for the sake of administration. the project mainly focus on the District 7、8、9, The pink lines in the map are the services areas in District 7,8,9. Note: The 9 district does not include the Smith Area.

Properties

pprProperties <- st_read('https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson')%>%
  st_transform(crs)

property <- st_join(st_centroid(pprProperties),pprServiceArea,left=TRUE) %>% 
  st_drop_geometry() %>% 
  left_join(pprProperties %>% dplyr::select(OBJECTID,geometry),by='OBJECTID') %>% 
  st_sf() %>% 
  st_transform(crs = crs) %>% 
  dplyr::select(-Shape__Length,-Shape__Area,-Shape_Leng,-Shape_Area) %>% 
  rename('ServiceAreaID' = 'INFO')

ggplot() + 
  geom_sf(data=property,color=palette1_main,fill = palette1_main)+
  geom_sf(data=st_union(pprDistrict),color="black",size=1,fill = "transparent")+
  geom_sf(data=pprDistrict,color="black",size=0.7,linetype ="dashed",fill = "transparent")+
  geom_sf(data=pprDistrict %>% filter(DISTRICTID %in% c(7,8,9)),color="black",size=1,fill = "transparent")+
  labs(title = "", 
       subtitle = "",
       x="",y="")+
  mapTheme()
Figure. Locations of PPR Properties

Figure. Locations of PPR Properties

The activities hold in PPR properties are recorded in two systems: the program schedules and the permit. With the former, the PPR staffing arrange many activities seasonally in different PPR facilities. With the latter, people outside the PPR apply for conducting activities and reserve space in Parks & Recreation areas. In 2021, there are 4376 times of activities recorded in program schedules, covering 7 categories of After School, Athletic, Camp, Cultural, Educational, Environmental/Outdoor, and other activities, while 3861 times of activities are recorded in the permit system.

Program & Permits Analysis

Overall Program & Permits Distribution

permit2021 <- vroom("data/FromPPR/PPR-recreation-permits-2021.csv")
program2021 <- vroom("data/FromPPR/PPR-programs-attended-2021-with-schedules.csv")
facilityID <- read.csv("data/FromPPR/tblFacility_to_PPR_Properties.csv")

# define date, filter by attendance date
program2021.clean <- program2021 %>% 
  mutate(AttendanceWeekDate = mdy(AttendanceWeekDate),
         DateFrom = mdy(DateFrom),
         DateTo = mdy(DateTo)) %>% 
  filter(AttendanceWeekDate > DateFrom & AttendanceWeekDate < DateTo)

# original data is recorded by week, here we change it into being recorded by occurence
program2021.clean <- separate(program2021.clean, Days,into= c("1","2","3","4","5","6","7"))

program2021.clean <- program2021.clean %>% 
  gather(colNames, weekday, 15:21) %>% 
  select(-colNames) %>% 
  na.omit(cols='weekday')%>% 
  mutate(AttendenceRealDate = case_when(
    weekday == "Monday" ~ AttendanceWeekDate,
    weekday == "Tuesday" ~ AttendanceWeekDate+1,
    weekday == "Wednesday" ~ AttendanceWeekDate+2,
    weekday == "Thursday" ~ AttendanceWeekDate+3,
    weekday == "Friday" ~ AttendanceWeekDate+4,
    weekday == "Saturday" ~ AttendanceWeekDate+5,
    weekday == "Sunday" ~ AttendanceWeekDate+6,
  ))

# join property,permit and program data
program2021.join <- left_join(program2021.clean, facilityID, by =c("FacilityID" = "FacilityID")) %>% 
  left_join(., property, by =c("PPR_Properties_ObjectID"="OBJECTID"))

permit2021.join <- left_join(permit2021, facilityID, by =c("FacilityID")) %>% 
  left_join(., property, by =c("PPR_Properties_ObjectID"="OBJECTID"))

# filter the failed joining items
program2021.otherDIstrict <- program2021.join %>% filter(is.na(PPR_Properties_ObjectID))

program2021.join <- program2021.join %>% drop_na(PPR_Properties_ObjectID)

permit2021.otherDIstrict <- permit2021.join %>% filter(is.na(PPR_Properties_ObjectID))

permit2021.join <- permit2021.join %>% drop_na(PPR_Properties_ObjectID)

# Wrangle "program2021.join", and extract month attendance
Facility_Program <- program2021.join %>%
  select(Facility,ActvityTypeCategory,ActivityType,
         AttendanceWeekDate,
         RegisteredIndividualsCount,
         PPR_DISTRICT, PUBLIC_NAME, PARENT_NAME,geometry) %>%
  mutate(month = case_when(month(AttendanceWeekDate)==1 ~ "Jan",
                           month(AttendanceWeekDate)==2 ~ "Feb",
                           month(AttendanceWeekDate)==3 ~ "Mar",
                           month(AttendanceWeekDate)==4 ~ "Apr",
                           month(AttendanceWeekDate)==5 ~ "May",
                           month(AttendanceWeekDate)==6 ~ "Jun",
                           month(AttendanceWeekDate)==7 ~ "Jul",
                           month(AttendanceWeekDate)==8 ~ "Aug",
                           month(AttendanceWeekDate)==9 ~ "Sep",
                           month(AttendanceWeekDate)==10 ~ "Oct",
                           month(AttendanceWeekDate)==11 ~ "Nov",
                           month(AttendanceWeekDate)==12 ~ "Dec")) %>% 
  distinct(.keep_all = FALSE)

Facility_Program_otherDistricts <- program2021.otherDIstrict %>%
  select(Facility,ActvityTypeCategory,ActivityType,
         AttendanceWeekDate,
         RegisteredIndividualsCount,
         PUBLIC_NAME, PARENT_NAME,geometry) %>%
  mutate(month = case_when(month(AttendanceWeekDate)==1 ~ "Jan",
                           month(AttendanceWeekDate)==2 ~ "Feb",
                           month(AttendanceWeekDate)==3 ~ "Mar",
                           month(AttendanceWeekDate)==4 ~ "Apr",
                           month(AttendanceWeekDate)==5 ~ "May",
                           month(AttendanceWeekDate)==6 ~ "Jun",
                           month(AttendanceWeekDate)==7 ~ "Jul",
                           month(AttendanceWeekDate)==8 ~ "Aug",
                           month(AttendanceWeekDate)==9 ~ "Sep",
                           month(AttendanceWeekDate)==10 ~ "Oct",
                           month(AttendanceWeekDate)==11 ~ "Nov",
                           month(AttendanceWeekDate)==12 ~ "Dec")) %>% 
  distinct(.keep_all = FALSE)

# Aggregate weekly visites
WeekVisit <- aggregate(
  RegisteredIndividualsCount ~ AttendanceWeekDate + ActvityTypeCategory + PPR_DISTRICT, data = Facility_Program, FUN = sum)

In the figure below, red legends show the locations of facilities with program schedules while orange legends show the distributions of facilities with permit records. One limitation should be pointed out here is that some facilities didn’t spatial joined the geographic information successfully, so, there are more facilities with program and permit compared to the number showing on the figure.

ggplot()+
  geom_sf(data=pprServiceArea %>% filter(PPR_DIST %in% c(7,8,9)),color='black',size=0.25,linetype ="dashed", fill= "transparent")+
  geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==7)),color="black",size=1,fill = "transparent")+
  geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==8)),color="black",size=1,fill = "transparent")+
  geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==9)),color="black",size=1,fill = "transparent")+
  geom_sf(data=permit2021.join,aes(geometry = geometry),color =palette1_assist,fill = palette1_assist, alpha = 0.7) +
  geom_sf(data=program2021.join,aes(geometry = geometry),color = palette1_main,fill = palette1_main,alpha = 0.7)+
  labs(title="Facilities w/ Programed (red) & Permited (orange) Activities in Disdrict 7,8 & 9")+
  mapTheme()
Figure. Locations of Programs and Permits

Figure. Locations of Programs and Permits

The above dataset gives information about PPR programs & permits in 2021, including information like the duration of the program, the attendance of the program etc. But some wrangling are needed for further uses. And the wrangling is taken in the next section. Through above wrangling, we link program data to their based properties,their belonged Service Area.

District 7,8,9

There are three facilities with program scheduled in District 7: Athletic Recreation Center, Mander Playground, and Marian Anderson Recreation Center. Marian Anderson Recreation Center hold more activities in athletic category, like soccer and baseball. The other two facilities hold more cultural activities, like art and music. Athletic activities were hold from March to November while cultural and after school activities mostly took place in fall.

District 7
ggplot()+
  geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==7)),color="black",size=1,fill = "transparent")+
  geom_sf(data=pprServiceArea %>% filter(PPR_DIST ==7),color="black",linetype ="dashed",size=1,fill = "transparent")+
  geom_sf(data=permit2021.join%>% filter(PPR_DIST ==7),aes(geometry = geometry),color =palette1_assist,fill = palette1_assist, alpha = 0.7) +
  geom_sf(data=program2021.join%>% filter(PPR_DIST ==7),aes(geometry = geometry),color = palette1_main,fill = palette1_main,alpha = 0.7)+
  labs(title="Facilities w/ Programed (red) & Permited (orange) Activities in Disdrict 7")+
  mapTheme()
Figure. Locations of Programs and Permits

Figure. Locations of Programs and Permits

ggplot(Facility_Program %>%filter(PPR_DISTRICT == 7)) +
  geom_bar(aes(x= Facility,fill = ActvityTypeCategory),position="stack")+
  scale_fill_manual(values = palette5)+
  labs(y = "Program Frequency", fill="Program Category", title = "Facility & Program Categories in District 7")+
  plotTheme()
Figure. Categories of Events in District7

Figure. Categories of Events in District7

ggplot(Facility_Program %>%filter(PPR_DISTRICT == 7)) + 
  geom_bar(aes(x= Facility, fill = ActivityType),position="stack")+
  scale_fill_manual(values = palette7)+
  labs(y = "Program Frequency", fill="Program sub-Category", title = "Facility & Sub-categories in District 7")+ 
  plotTheme()
Figure. Categories of Events in District7

Figure. Categories of Events in District7

ggplot(WeekVisit %>%filter(PPR_DISTRICT == 7)) +
  geom_line(size=0.5,aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory)) +
  geom_point(aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory, size = RegisteredIndividualsCount)) +
  scale_color_manual(values = palette5)+
  scale_size_continuous(range = c(2, 4))+
  labs(title = "Visitor Counts by Week and Activity Categories in District 7",
       color = "Program Category",
       size="Visitor Counts",
       x = "Week of the Year",
       y = "Visitor Counts")+
  plotTheme()
Figure. Categories of Events in District7

Figure. Categories of Events in District7

There are five facilities with program scheduled in District 8: 48th & Woodland Playground, Christy Recreation Center, Howards S. Morris Recreation Center, Laura Sims Skate House, and Shepard Recreation Center. Laura Sims Skate House hold hockey and ice skating activities from October to May the next year, while Morris Recreation Center hosted more cultural activities like dance as well as athletic activities of gymnastics, tumbling and basketball from October to December.

District 8
ggplot()+
  geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==8)),color="black",size=1,fill = "transparent")+
  geom_sf(data=pprServiceArea %>% filter(PPR_DIST ==8),color="black",linetype ="dashed",size=1,fill = "transparent")+
  geom_sf(data=permit2021.join%>% filter(PPR_DIST ==8),aes(geometry = geometry),color =palette1_assist,fill = palette1_assist, alpha = 0.7) +
  geom_sf(data=program2021.join%>% filter(PPR_DIST ==8),aes(geometry = geometry),color = palette1_main,fill = palette1_main,alpha = 0.7)+
  labs(title="Facilities w/ Programed (red) & Permited (orange) Activities in Disdrict 8")+
  mapTheme()
Figure. Locations of Programs and Permits

Figure. Locations of Programs and Permits

ggplot(Facility_Program %>%filter(PPR_DISTRICT == 8)) +
  geom_bar(aes(x= Facility,fill = ActvityTypeCategory),position="stack")+
  scale_fill_manual(values = palette5)+
  labs(y = "Program Frequency", fill="Program Category", title = "Facility & Program Categories in District 8")+
  plotTheme()
Figure. Categories of Events in District8

Figure. Categories of Events in District8

ggplot(Facility_Program %>%filter(PPR_DISTRICT == 8)) + 
  geom_bar(aes(x= Facility, fill = ActivityType),position="stack")+
  scale_fill_manual(values = palette9)+
  labs(y = "Program Frequency", fill="Program sub-Category", title = "Facility & Sub-categories in District 8")+ 
  plotTheme()
Figure. Categories of Events in District8

Figure. Categories of Events in District8

ggplot(WeekVisit %>%filter(PPR_DISTRICT == 8)) +
  geom_point(aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory, size = RegisteredIndividualsCount)) +
  geom_line(size=0.5,aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory)) +
  scale_color_manual(values = palette5)+
  scale_size_continuous(range = c(2, 4))+
  labs(title = "Visitor Counts by Week and Activity Categories in District 8",
       color = "Program Category",
       size="Visitor Counts",
       x = "Week of the Year",
       y = "Visitor Counts")+
  plotTheme()
Figure. Categories of Events in District8

Figure. Categories of Events in District8

There are seven facilities with program scheduled in District 9: Barry Playground and Pool, Cibotti Recreation Center, DiSilvestro Playground, East Passyunk Community Center, Eastwick Regional Playground, and Thomas B. Smith Recreation Center. Athletic activities of basketball and aquatics mostly took place in Barry Playground and Pool and East Passyunk Community Center. Eastwick Regional Playground, and Thomas B. Smith Recreation Center are the most two popular facilities with activities of athletic, after school, cultural, educational, and other categories. Athletic activities took place throughout the whole year, while other category of activities, like older adults and mentoring, were hold in the 2nd half of the year. Cultural activities, like art, dance, music, usually suspended in summer.

District 9
ggplot()+
  geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==9)),color="black",size=1,fill = "transparent")+
  geom_sf(data=pprServiceArea %>% filter(PPR_DIST ==9),color="black",linetype ="dashed",size=1,fill = "transparent")+
  geom_sf(data=permit2021.join%>% filter(PPR_DIST ==9),aes(geometry = geometry),color =palette1_assist,fill = palette1_assist, alpha = 0.7) +
  geom_sf(data=program2021.join%>% filter(PPR_DIST ==9),aes(geometry = geometry),color = palette1_main,fill = palette1_main,alpha = 0.7)+
  labs(title="Facilities w/ Programed (red) & Permited (orange) Activities in Disdrict 9")+
  mapTheme()
Figure. Locations of Programs and Permits

Figure. Locations of Programs and Permits

ggplot(Facility_Program %>%filter(PPR_DISTRICT == 9)) +
  geom_bar(aes(x= Facility,fill = ActvityTypeCategory),position="stack")+
  scale_fill_manual(values = palette7[2:7])+
  labs(y = "Program Frequency", fill="Program Category", title = "Facility & Program Categories in District 9")+
  plotTheme()
Figure. Categories of Events in District8

Figure. Categories of Events in District8

ggplot(WeekVisit %>%filter(PPR_DISTRICT == 9)) +
  geom_point(aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory, size = RegisteredIndividualsCount)) +
  geom_line(size=0.5,aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory)) +
  scale_color_manual(values = palette7[2:7])+
  scale_size_continuous(range = c(2, 4))+
  labs(title = "Visitor Counts by Week and Activity Categories in District 9",
       color = "Program Category",
       size="Visitor Counts",
       x = "Week of the Year",
       y = "Visitor Counts")+
  plotTheme()
Figure. Categories of Events in District8

Figure. Categories of Events in District8

There are 27 facilities with program scheduled in other districts of PPR serving areas. We will have further analysis after PPR provide specific sorting information.

Other Districts

ggplot(Facility_Program_otherDistricts) +
  geom_bar(aes(x= Facility,fill = ActvityTypeCategory),position="stack")+
  scale_fill_manual(values = palette7)+
  labs(y = "Program Frequency", fill="Program Category", title = "Facility & Program Categories in District 9")+
  plotTheme()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
Figure. Categories of Events in other Districts

Figure. Categories of Events in other Districts

SafeGraph Data Analysis

# brand_info <- vroom("data/safegraph/Philadelphia-Camden-WilmingtonPA-NJ-DE-MDMSA-CORE_POI-2021_11-2021-12-17/brand_info.csv")
# core_poi <- vroom("data/safegraph/Philadelphia-Camden-WilmingtonPA-NJ-DE-MDMSA-CORE_POI-2021_11-2021-12-17/core_poi.csv")
# 
# monthList = c("01","02","03","04","05","06","07","08","09","10","11")
# 
# patternAllMonth = data.frame()
# for (i in monthList){
#   currentMonth = vroom(paste("data/safegraph/SafeGraph Data Purchase Dec-16-2021/Philadelphia-Camden-WilmingtonPA-NJ-DE-MDMSA-PATTERNS-2021_",
#        i,
#        "-2021-12-17/patterns.csv.gz",sep = ""))%>%
#     filter(region=="PA")%>%
#     mutate(month=paste(i,sep = ""))
#   patternAllMonth = rbind(patternAllMonth,currentMonth)
# }
# 
# # filter into philly, join with POI data
# safeGraph <- patternAllMonth %>%
#   filter(city == "Philadelphia")%>%
#   left_join(core_poi %>% dplyr::select(placekey,location_name,top_category,sub_category,naics_code,latitude,longitude),
#             by=c("placekey"="placekey","location_name" = "location_name"),keep=FALSE)
# 
# # create geometry from lat & lng
# safeGraph.geo <-
#   safeGraph %>%
#   st_as_sf(coords = c("longitude", "latitude"), crs = crs, agr = "constant", na.fail=FALSE)


# patternAllMonth <- st_read("data/output/patternAllMonth.csv")
#st_write(safeGraph.geo,"data/output/safeGraph.geo.GeoJSON")
safeGraph.geo <- st_read("data/output/safeGraph.geo.GeoJSON",crs = crs)
## Reading layer `safeGraph.geo' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\safeGraph.geo.GeoJSON' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 198218 features and 31 fields (with 11409 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.55742 ymin: 39.85988 xmax: -74.93288 ymax: 40.1344
## Geodetic CRS:  WGS 84
# change workers number by yourself
plan(multiprocess, workers = 10)

# keep congeneric bussiness
congenericMoves <-
  safeGraph.geo %>%
  filter(top_category %in% c("Promoters of Performing Arts, Sports, and Similar Events","Other Amusement and Recreation Industries","Museums, Historical Sites, and Similar Institutions") | str_detect(location_name, "Park") | str_detect(location_name, "Playground") | str_detect(location_name, "Recreation Center")) %>%
  filter(str_detect(location_name, "Parking", negate = TRUE))

# Keep only ppr sites
parks <- safeGraph.geo %>% 
  dplyr::select(placekey, naics_code, location_name) %>% 
  distinct() %>% 
  filter(naics_code %in% c(712190, 713990, 713940, 711310) | str_detect(location_name, "Park") | str_detect(location_name, "Playground") | str_detect(location_name, "Recreation Center")) %>%
  filter(str_detect(location_name, "Parking", negate = TRUE)) %>% 
  st_transform(crs = 4326)

PPRmoves <- safeGraph.geo %>% 
  filter(placekey %in% as.list(parks$placekey))

712190:Nature Parks and Other Similar Institutions; 713990:All Other Amusement and Recreation Industries; 713940: Fitness and Recreational Sports Centers; 711310:Promoters of Performing Arts, Sports, and Similar Events

# join filtered safeGraph place with ppr property
propertyWithPlaceKey <- st_join(property %>% st_buffer(10),parks %>% dplyr::select(placekey, geometry),left=FALSE) %>%
  st_drop_geometry() %>% 
  left_join(property %>% dplyr::select(OBJECTID),by=('OBJECTID'='OBJECTID')) %>% 
  st_sf() %>% 
  st_transform(crs=crs) %>% 
  drop_na(placekey)

# join filtered safeGraph place with ppr programs
program2021.joinWithPlaceKey <- 
  st_join(program2021.join %>%
            st_sf() %>% 
            st_transform(crs=crs) %>%
            st_buffer(10),
          parks %>% dplyr::select(placekey, geometry),left=FALSE) %>%
  st_drop_geometry() %>%  
  merge.data.frame(program2021.join %>%
                     dplyr::select(geometry),
                   by='row.names')%>%
  dplyr::select(-Row.names) %>% 
  st_sf() %>% 
  st_transform(crs=crs)

mapview(property)+mapview(st_centroid(propertyWithPlaceKey),col.regions = "green")

In the above map, the polygon is the properties of PPR, the green dots are the successfully join properties with placekey

# unnest visit Count data
# visitCount <-
#   PPRmoves %>%
#   select(placekey, date_range_start, date_range_end, visits_by_day) %>%
#   mutate(date_range_start = as_date(date_range_start),
#          date_range_end = as_date(date_range_end)) %>%
#   dplyr::select(-date_range_end) %>%
#   mutate(visits_by_day = future_map(visits_by_day, function(x){
#     unlist(x) %>%
#       as_tibble() %>%
#       rowid_to_column(var = "day") %>%
#       mutate(visits = value) %>%
#       dplyr::select(-value)
#   })) %>%
#   unnest(cols = c("visits_by_day"))
# visitCount <- visitCount %>%
#     rename(visitDay = date_range_start) %>% 
#     mutate(visitDay = day+visitDay-1) %>% 
#     mutate(month = month(visitDay))
# st_write(visitCount,"data/output/visitCount.GeoJSON")
visitCount <- st_read("data/output/visitCount.GeoJSON",crs=crs)
## Reading layer `visitCount' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\visitCount.GeoJSON' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 341467 features and 5 fields (with 8647 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS:  WGS 84
# unnest popularity_by_hour data
# visitHour <-
#   PPRmoves %>%
#   select(placekey, popularity_by_hour, date_range_start) %>%
#   mutate(date_range_start = as_date(date_range_start),
#          month =  month(date_range_start)) %>%
#   dplyr::select(-date_range_start) %>%
#   mutate(popularity_by_hour = future_map(popularity_by_hour, function(x){
#     unlist(x) %>%
#       as_tibble() %>%
#       rowid_to_column(var = "hour") %>%
#       rename(visit = value)
#   }))%>%
#   unnest(popularity_by_hour)
# 
# st_write(visitHour,"data/output/visitHour.GeoJSON")
visitHour <- st_read("data/output/visitHour.GeoJSON",crs=crs)
## Reading layer `visitHour' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\visitHour.GeoJSON' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 269880 features and 4 fields (with 6840 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS:  WGS 84
# unnest the origin-destination data

# originCount <-
#   PPRmoves %>%
#   select(placekey, visitor_home_cbgs, date_range_start) %>%
#   mutate(date_range_start = as_date(date_range_start),
#          month =  month(date_range_start)) %>%
#   dplyr::select(-date_range_start) %>%
#   mutate(visitor_home_cbgs = future_map(visitor_home_cbgs, function(x){
#     jsonlite::fromJSON(x) %>%
#       as_tibble() %>% 
#       dplyr::select(starts_with("4")) %>%
#       gather()
#   })) %>%
#   unnest(visitor_home_cbgs) %>% 
#   rename(origin =key ,visitors= value)
# 
# st_write(originCount,"data/output/originCount.GeoJSON")
originCount <- st_read("data/output/originCount.GeoJSON",crs=crs)
## Reading layer `originCount' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\originCount.GeoJSON' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 348519 features and 4 fields (with 8681 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS:  WGS 84
#unnest the departure - destination data
# departCount <-
#   PPRmoves %>%
#   select(placekey, visitor_daytime_cbgs, date_range_start) %>%
#   mutate(date_range_start = as_date(date_range_start),
#          month =  month(date_range_start)) %>%
#   dplyr::select(-date_range_start) %>%
#   mutate(visitor_daytime_cbgs = future_map(visitor_daytime_cbgs, function(x){
#     jsonlite::fromJSON(x) %>%
#       as_tibble() %>%
#       dplyr::select(starts_with("4")) %>%
#       gather()
#   }))%>%
#   unnest(visitor_daytime_cbgs) %>%
#   rename(departure =key ,visitors= value)
# 
# st_write(departCount,"data/output/departCount.GeoJSON")
departCount <- st_read("data/output/departCount.GeoJSON",crs=crs)
## Reading layer `departCount' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\departCount.GeoJSON' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 299627 features and 4 fields (with 7380 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS:  WGS 84
# unnest the bucketed_dwell_times data
# dwellTime <-
#   PPRmoves %>%
#   select(placekey, bucketed_dwell_times, date_range_start) %>%
#   mutate(date_range_start = as_date(date_range_start),
#          month =  month(date_range_start)) %>%
#   dplyr::select(-date_range_start) %>%
#   mutate(bucketed_dwell_times = future_map(bucketed_dwell_times, function(x){
#     jsonlite::fromJSON(x) %>%
#       as_tibble() %>%
#       gather()
#   }))%>%
#   unnest(bucketed_dwell_times) %>%
#   rename(dwellTimes =key ,visitors= value)
# 
# st_write(dwellTime,"data/output/dwellTime.GeoJSON")
dwellTime <- st_read("data/output/dwellTime.GeoJSON",crs=crs)
## Reading layer `dwellTime' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\dwellTime.GeoJSON' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 78715 features and 4 fields (with 1995 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS:  WGS 84
# unnest the related_same_day_brand
# relatedBrand <-
#   PPRmoves %>%
#   select(placekey, related_same_day_brand, date_range_start) %>%
#   mutate(date_range_start = as_date(date_range_start),
#          month =  month(date_range_start)) %>%
#   dplyr::select(-date_range_start) %>%
#   mutate(related_same_day_brand = future_map(related_same_day_brand, function(x){
#     jsonlite::fromJSON(x) %>%
#       as_tibble() %>%
#       gather()
#   }))
# 
# relatedBrand <- relatedBrand %>%
#   unnest(related_same_day_brand) %>%
#   rename(relatedBrand =key ,visitors= value)
# 
# st_write(relatedBrand,"data/output/relatedBrand.GeoJSON")
relatedBrand <- st_read("data/output/relatedBrand.GeoJSON",crs=crs)
## Reading layer `relatedBrand' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\relatedBrand.GeoJSON' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 209840 features and 4 fields (with 5124 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS:  WGS 84
# unnest the popularity_by_day data
# visitWeekday <-
#   PPRmoves %>%
#   select(placekey, popularity_by_day, date_range_start) %>%
#   mutate(date_range_start = as_date(date_range_start),
#          month =  month(date_range_start)) %>%
#   dplyr::select(-date_range_start) %>%
#   mutate(popularity_by_day = future_map(popularity_by_day, function(x){
#     jsonlite::fromJSON(x) %>%
#       as_tibble() %>%
#       gather()
#   }))%>%
#   unnest(popularity_by_day) %>%
#   rename(visitWeekday =key ,visits= value)
# 
# st_write(visitWeekday,"data/output/visitWeekday.GeoJSON")
visitWeekday <- st_read("data/output/visitWeekday.GeoJSON",crs=crs)
## Reading layer `visitWeekday' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\visitWeekday.GeoJSON' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 78715 features and 4 fields (with 1995 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS:  WGS 84

Bias Analysis and Data Rectification

Sometimes, SafeGraph will count “Residents” or “passer-bys” of the position in “visits”, which will result in huge bias and mislead the analysis. So, to conclude typical position types of SafeGraph data bias, we choose midnight visitsand dwell time as factors for cluster analysis. We also apply principal component analysis here to assist in understanding characteristics of clusters.

Taking results of both cluster analysis and component analysis, most locations, 1094 out of 1117, belong to cluster 3 with reliable data. Cluster 2 includes 14 places with extremely more night visits and long visits that are more than 2 hours. Cluster 1 consists of 9 locations with many transitory visits that are less than 10 minutes.

# calculate midnight visit
visitHourNight <- visitHour %>% 
  filter(hour >= 1 | hour <=5) %>% 
  group_by(placekey) %>% 
  summarize(nightVisit = sum(visit))


# integrate sg data by midnight visit and dweel time
visitIntegrated <- full_join(visitHourNight %>% 
                               st_drop_geometry(), 
                             dwellTime %>% 
                               group_by(placekey, dwellTimes) %>% 
                               summarize(visitors = sum(visitors)),
                             by=c('placekey'))

# wide format
visitIntegrated <- visitIntegrated %>% 
  spread(key = dwellTimes, value = visitors)

# export
# st_write(visitIntegrated, "data/output/visitIntegrated.GeoJSON")
# import
visitIntegrated <- st_read("data/output/visitIntegrated.GeoJSON") %>% 
  rename(c("Dwell<5"="X.5","Dwell>240"="X.240","Dwell11-20"="X11.20","Dwell121-240"="X121.240","Dwell21-60"="X21.60","Dwell5-10"="X5.10","Dwell61-120"="X61.120" ))
## Reading layer `visitIntegrated' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\visitIntegrated.GeoJSON' 
##   using driver `GeoJSON'
## Simple feature collection with 1117 features and 9 fields (with 29 geometries empty)
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS:  WGS 84
visitIntegrated <- visitIntegrated[,c(1,2,3,8,5,7,9,6,4)]

# normalize
visitIntegrated.scale <- scale(visitIntegrated %>% 
                                 dplyr::select(-placekey) %>% 
                                 st_drop_geometry())

set.seed(1234)
# decide cluster number (only run once)

#nc <- NbClust(visitIntegrated.scale, min.nc=2, max.nc=15, method="kmeans", index="all")
#table(nc$Best.n[1,])
# 
# barplot(table(nc$Best.n[1,]),
#         xlab="Numer of Clusters", ylab="Number of Criteria",
#         main="Number of Clusters Chosen by 26 Criteria")


# Run K-Means cluster
cluster1 <- kmeans(visitIntegrated.scale, 3) 
summary(cluster1)
##              Length Class  Mode   
## cluster      1117   -none- numeric
## centers        24   -none- numeric
## totss           1   -none- numeric
## withinss        3   -none- numeric
## tot.withinss    1   -none- numeric
## betweenss       1   -none- numeric
## size            3   -none- numeric
## iter            1   -none- numeric
## ifault          1   -none- numeric
# add cluster number back
visitIntegrated <- visitIntegrated %>% 
  mutate(cluster = cluster1$cluster)

# mean by cluster
cluster1_mean <- aggregate(visitIntegrated %>% 
                             dplyr::select(-placekey, -cluster) %>% 
                             st_drop_geometry(),
                       by=list(cluster=visitIntegrated$cluster),
                       FUN=mean) %>% 
  left_join(visitIntegrated %>% 
              st_drop_geometry() %>% 
              group_by(cluster) %>% 
              summarize(size = n()),
            by="cluster")

kable(cluster1_mean,align = 'c',caption = '<center>Table 1. Mean values of clusters for SafeGraph data <center/>') %>%
  kable_classic(full_width = F)%>%
  kable_styling(position = "center")%>%
  scroll_box(width = "100%", height = "400px")
Table 1. Mean values of clusters for SafeGraph data
cluster nightVisit Dwell<5 Dwell5-10 Dwell11-20 Dwell21-60 Dwell61-120 Dwell121-240 Dwell>240 size
1 172970.00 1678.88889 15356.1111 9700.4444 16923.7778 11926.0000 8894.4444 5530.0000 9
2 2589580.14 340.57143 3161.0000 4029.0714 26152.4286 48977.9286 64342.5714 172938.8571 14
3 17164.71 50.56856 488.9762 337.9122 824.6335 659.4059 531.0649 926.6846 1094
## put histograms on the diagonal
panel.hist <- function(x, ...)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, ...)
}

#Color points by groups
my_cols <- palette4[1:3]

pairs(visitIntegrated %>% 
        st_drop_geometry() %>% 
        mutate(`Dwel<10`=`Dwell<5`+`Dwell5-10`,
               `Dwell11-120` = `Dwell11-20` + `Dwell21-60` + `Dwell61-120`,
               `Dwell>120` =  `Dwell121-240` + `Dwell>240`) %>% 
        dplyr::select(nightVisit, `Dwel<10`, `Dwell11-120`, `Dwell>120`), 
      pch = 19,  cex = 1, diag.panel = panel.hist,
      col = my_cols[visitIntegrated$cluster],
      lower.panel=NULL, panel = panel.smooth)
Figure. Correlation Matrix of SafeGraph Bias

Figure. Correlation Matrix of SafeGraph Bias

x <- clusplot(visitIntegrated.scale, 
         cluster1$cluster, color=TRUE, shade=TRUE, main = "",
         labels=5, lines=0, stand=TRUE, col.txt=palette5[1:3], col.clus=palette5[1:3], col.p=palette5[5])
Figure. Cluster Plot with Main Component

Figure. Cluster Plot with Main Component

# decide component number (2)
fa.parallel(as.data.frame(visitIntegrated.scale), fa = 'pc', n.iter = 100, show.legend = FALSE)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  2
# principal component analysis
set.seed(1234)
cluster1_pca <- principal(visitIntegrated.scale, nfactors = 2, rotate = "none", scores = TRUE)

options(digits = 2) 

kable(cluster1_pca$Structure[1:8,1:2],align = 'c',caption = '<center>Table 2. Correlations between variables and components <center/>') %>%
  kable_classic(full_width = F)%>%
  kable_styling(position = "center")%>%
  scroll_box(width = "100%", height = "400px")
Table 2. Correlations between variables and components
PC1 PC2
nightVisit 0.89 -0.44
Dwell<5 0.64 0.75
Dwell5-10 0.64 0.75
Dwell11-20 0.79 0.57
Dwell21-60 0.99 0.01
Dwell61-120 0.95 -0.29
Dwell121-240 0.92 -0.38
Dwell>240 0.88 -0.47

Based on the bias analysis above, we rectify SafeGraph data depending on its cluster / type. First, all visit counts with dwell time that is less than 5 minutes are removed. Also, for cluster 1, visit counts with dwell time that is between 5 and 10 minutes will be reduced referring to the related proportion in the normal cluster 3. On top of that, long visits that are longer than 2 hours in Cluster 2 will be trimmed into the appropriate ratio according to midnight visitors which can be treated as “residents”.

# cluster info
cluster <- visitIntegrated %>% 
              st_drop_geometry() %>%
              dplyr::select(placekey, cluster)

# caculate the ratio to trim "Dwell5-10" for cluster 1
Sum <- visitIntegrated %>% 
  st_drop_geometry() %>% 
  mutate(Total = `Dwell<5`+`Dwell5-10`+`Dwell11-20`+`Dwell21-60`+`Dwell61-120`+`Dwell121-240`+`Dwell>240`) %>% 
  group_by(cluster) %>% 
  summarize(`Dwell5-10` = sum(`Dwell5-10`),
            `Dwell<5` = sum(`Dwell<5`),
            Total = sum(Total))

ratio5To10 <- Sum[1,]$`Dwell5-10`/Sum[1,]$Total

group1.dwell <- dwellTime %>% 
  st_drop_geometry() %>% 
  spread(key=dwellTimes, value = visitors) %>% 
  left_join(visitIntegrated %>% dplyr::select(placekey, cluster) %>% st_drop_geometry(), by="placekey") %>% 
  filter(cluster==1) %>% 
  mutate(Total = `<5`+`5-10`+`11-20`+`21-60`+`61-120`+`121-240`+`>240`,
         `rectified5-10`=ifelse(Total*ratio5To10<=`5-10`,Total*ratio5To10,`5-10`),
         `rectified5-10rate`=`rectified5-10`/Total,
         `<5rate`=`<5`/Total)

group23.dwell <- dwellTime %>% 
  st_drop_geometry() %>% 
  spread(key=dwellTimes, value = visitors) %>% 
  mutate(Total = `<5`+`5-10`+`11-20`+`21-60`+`61-120`+`121-240`+`>240`,
         `<5rate`=`<5`/Total)

# caculate the ratio to trim "Dwell121-240" and "Dwell>240" for cluster 2
ratio2h <- visitHour %>% 
  st_drop_geometry() %>% 
  spread(key = hour, value = visit) %>% 
  left_join(visitIntegrated %>% 
              st_drop_geometry() %>%
              dplyr::select(placekey, cluster)) %>% 
  mutate(residents = (`1`+`2`+`3`+`4`+`5`)/5,
         hourlySum = (`1`+`2`+`3`+`4`+`5`+`6`+`7`+`8`+`9`+`10`+`11`+`12`+`13`+`14`+`15`+`16`+`17`+`18`+`19`+`20`+`21`+`22`+`23`+`24`),
         `ratio>2h` = 17*residents/hourlySum) %>% 
  dplyr::select(placekey, month, `ratio>2h`, cluster)
## Joining, by = "placekey"
# rectify 'visitHour'
visitHour <- visitHour %>% 
  left_join(ratio2h, by=c("placekey","month"))
  
visitHour <- visitHour %>% 
  filter(cluster==2) %>% 
  filter(hour<=7 | hour>=19) %>% 
  mutate(visit = visit*(1-`ratio>2h`)) %>%  
  rbind(
    visitHour %>% 
      filter(cluster!=2)
  ) %>% 
  rbind(
    visitHour %>% 
      filter(cluster==2) %>% 
      filter(hour>7 & hour<19)
  ) %>% 
  dplyr::select(-`ratio>2h`, -cluster)

# rectify 'dwellTime'
dwellTime <- dwellTime %>% 
  left_join(ratio2h, by=c("placekey","month"))

dwellTime <- dwellTime %>% 
  filter(cluster==2) %>% 
  filter(dwellTimes=="121-240" | dwellTimes==">240") %>% 
  mutate(visitors = visitors*(1-`ratio>2h`)) %>%  
  rbind(dwellTime %>% 
          filter(cluster==2) %>% 
          filter(dwellTimes!="121-240" & dwellTimes!=">240")) %>% 
  rbind(dwellTime %>% 
          filter(cluster==1) %>% 
          filter(dwellTimes=="5-10") %>% 
          left_join(group1.dwell %>% dplyr::select(placekey,month,`rectified5-10`),by=c("placekey","month")) %>% 
          mutate(visitors = `rectified5-10`) %>% 
          dplyr::select(-`rectified5-10`)) %>% 
  rbind(dwellTime %>% 
          filter(cluster==1) %>% 
          filter(dwellTimes!="5-10")) %>% 
  rbind(dwellTime %>% 
          filter(cluster==3)) %>% 
  dplyr::select(-`ratio>2h`, -cluster)

dwellTime <- dwellTime %>% 
  filter(dwellTimes!='<5')

# rectify 'visitCount'
visitCount <- visitCount %>% 
  left_join(ratio2h, by=c("placekey","month"))

visitCount <- visitCount %>% 
  filter(cluster==1) %>% 
  left_join(group1.dwell,by=c("placekey","month")) %>% 
  mutate(visits = visits*(1-`rectified5-10rate`-`<5rate`)) %>% 
  dplyr::select(placekey,visitDay,day,visits,month,geometry) %>% 
  rbind(visitCount %>% 
          filter(cluster==2)%>% 
          left_join(group23.dwell,by=c("placekey","month")) %>% 
          left_join(ratio2h, by=c("placekey","month")) %>% 
          mutate(visits = visits*(1-`<5rate`-`ratio>2h.y`)) %>% 
          dplyr::select(placekey,visitDay,day,visits,month,geometry)) %>% 
  rbind(visitCount %>% 
          filter(cluster==3) %>% 
          left_join(group23.dwell,by=c("placekey","month")) %>% 
          mutate(visits = visits*(1-`<5rate`)) %>% 
          dplyr::select(placekey,visitDay,day,visits,month,geometry)) %>% 
  mutate(visits = round(visits,0))

Overall SafeGraph Patterns

Device Visit Counts
sumVisit <- visitCount %>%
  dplyr::select(-visitDay,-day,-month) %>% 
  group_by(placekey) %>%
  summarise(visits=sum(visits))

ggplot(sumVisit)+
  geom_sf(data=pprServiceArea,color='black',size=0.25,fill= "transparent")+
  geom_sf(data=pprServiceArea %>% filter(PPR_DIST %in% c(7,8,9)),color='black',size=0.5,fill= "transparent")+
  geom_sf(aes(size = visits),color = palette1_main,fill = palette1_main,alpha = 0.3) +
  scale_size_continuous(range = c(1, 3),name = "Visits")+
  mapTheme()+
  theme(legend.position = "bottom",
        legend.key.width = unit(0.5, 'cm'),
        legend.key.height  = unit(0.2, 'cm'))
Figure. Map of total device visits

Figure. Map of total device visits

sumVisitServiceArea <- pprServiceArea %>%
  st_join(sumVisit,left =TRUE) %>% 
  drop_na(INFO) %>% 
  dplyr::select(INFO,visits,geometry) %>% 
  group_by(INFO) %>% 
  mutate(totalVisits=sum(visits)) %>% 
  dplyr::select(-visits) %>% 
  distinct() %>% 
  st_sf() %>% 
  st_transform(crs=crs)

ggplot(sumVisitServiceArea)+
  geom_sf(aes(fill = q5(totalVisits)),color = 'white',size=0.5) +
  scale_fill_manual(values = palette5,labels = qBr(sumVisitServiceArea,'totalVisits'),name = "Total Device Visits") +
  mapTheme()+
  theme(legend.position = "bottom",
        legend.key.width = unit(0.5, 'cm'),
        legend.key.height  = unit(0.2, 'cm'))
Figure. Map of total device visits

Figure. Map of total device visits

Origin
# CensusData <-
#   get_acs(geography = "block group",
#           variables = c("B01003_001E"),
#           year=2019, state="PA", county="Philadelphia", geometry=T, output="wide") %>%
#   st_transform(crs=4326) %>%
#   dplyr::select(-NAME,-starts_with('B')) %>%
#   st_centroid() %>%
#   as.data.frame() %>%
#   rename("originGeometry" = "geometry")
# 
# placekeyGeometry <-
#   originCount %>%
#   dplyr::select(placekey) %>%
#   group_by(placekey) %>% summarise()
# 
# orgCountPlot <- originCount %>% 
#   st_drop_geometry() %>% 
#   group_by(origin,placekey) %>%
#   summarise(visitors=sum(visitors))%>% 
#   left_join(placekeyGeometry,by=c("placekey" = "placekey")) %>% 
#   rename("parkGeometry" = "geometry")%>%
#   filter(startsWith(origin,"42101"))%>%
#   left_join(CensusData,by=c("origin" = "GEOID"))%>%
#   mutate(park.y=as.numeric(map(parkGeometry,function(x){return(x[2])})),
#          park.x=as.numeric(map(parkGeometry,function(x){return(x[1])})),
#          origin.y=as.numeric(map(originGeometry,function(x){return(x[2])})),
#          origin.x=as.numeric(map(originGeometry,function(x){return(x[1])})))
# 
# st_write(orgCountPlot,"data/output/orgCountPlot.GEOJSON")
orgCountPlot <- st_read("data/output/orgCountPlot.GEOJSON")
## Reading layer `orgCountPlot' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\orgCountPlot.GEOJSON' 
##   using driver `GeoJSON'
## Simple feature collection with 105487 features and 7 fields (with 1287 geometries empty)
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -75 ymin: 40 xmax: -75 ymax: 40
## Geodetic CRS:  WGS 84
orgCountPlot2 <- orgCountPlot%>%
  filter(visitors>200)

orgCountPlotHF <- orgCountPlot%>%
  filter(visitors>2000)


ggplot(data = orgCountPlot2) + 
  geom_sf(data = pprServiceArea %>% st_transform(crs=4326),fill ="transparent", color="black",size=0.5)+
  geom_sf(data=pprDistrict %>% st_transform(crs=4326),fill ="transparent", color="black",size=1)+
  geom_curve(aes(x = origin.x, y = origin.y,xend = park.x,yend = park.y,color = q5(visitors)),
             size = 0.5,
             curvature = -0.2, 
             alpha = 0.4,)+
  scale_color_manual(values = palette5,
                     labels = qBr(orgCountPlot2,"visitors"),
                     name = "Device Visits (Quintile Breaks)") +
  labs(x="",y="")+
  mapTheme()+
  theme(legend.position = "bottom",panel.spacing = unit(6, 'lines'),
        legend.key.width = unit(0.5, 'cm'),
        legend.key.height  = unit(0.2, 'cm'))
Figure. Flow map of parks and origins

Figure. Flow map of parks and origins

ggplot(data = orgCountPlotHF) + 
  geom_sf(data = pprServiceArea %>% st_transform(crs=4326),fill ="transparent", color="black",size=1)+
  geom_sf(data=pprDistrict %>% st_transform(crs=4326),fill ="transparent", color="black",size=2)+
  geom_curve(aes(x = origin.x,y = origin.y,xend = park.x,yend = park.y),
             size = 1,color = palette1_main,curvature = -0.2, alpha = 0.4)+
  labs(x="",y="")+
  mapTheme()+
  theme(legend.position = "bottom",panel.spacing = unit(6, 'lines'),
        legend.key.width = unit(0.5, 'cm'),
        legend.key.height  = unit(0.2, 'cm'))
Figure. Flow map of parks and origins - High Frequency

Figure. Flow map of parks and origins - High Frequency

Departure
# depaCountPlot <- departCount %>% 
#   st_drop_geometry() %>% 
#   group_by(departure,placekey) %>%
#   summarise(visitors=sum(visitors))%>% 
#   left_join(placekeyGeometry,by=c("placekey" = "placekey")) %>% 
#   rename("parkGeometry" = "geometry")%>%
#   filter(startsWith(departure,"42101"))%>%
#   left_join(CensusData,by=c("departure" = "GEOID"))%>%
#   mutate(park.y=as.numeric(map(parkGeometry,function(x){return(x[2])})),
#          park.x=as.numeric(map(parkGeometry,function(x){return(x[1])})),
#          departure.y=as.numeric(map(originGeometry,function(x){return(x[2])})),
#          departure.x=as.numeric(map(originGeometry,function(x){return(x[1])})))
# 
# st_write(depaCountPlot,"data/output/depaCountPlot.GEOJSON")
depaCountPlot <- st_read("data/output/depaCountPlot.GEOJSON")
## Reading layer `depaCountPlot' from data source 
##   `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\depaCountPlot.GEOJSON' 
##   using driver `GeoJSON'
## Simple feature collection with 98685 features and 7 fields (with 1219 geometries empty)
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -75 ymin: 40 xmax: -75 ymax: 40
## Geodetic CRS:  WGS 84
depaCountPlot2 <- depaCountPlot%>%
  filter(visitors>200)

depaCountPlotHF <- depaCountPlot%>%
  filter(visitors>2000)

ggplot(data = depaCountPlot2) + 
  geom_sf(data = pprServiceArea %>% st_transform(crs=4326),fill ="transparent", color="black",size=0.5)+
  geom_sf(data=pprDistrict %>% st_transform(crs=4326),fill ="transparent", color="black",size=1)+
  geom_curve(aes(x = departure.x, y = departure.y, xend = park.x, yend = park.y,color = q5(visitors)),
             size = 0.5,curvature = -0.2, alpha = 0.4)+
  scale_color_manual(values = palette5,
                     labels = qBr(depaCountPlot2,"visitors"),
                     name = "Device Visits (Quintile Breaks)") +
  labs(x="",y="")+
  mapTheme()+
  theme(legend.position = "bottom",panel.spacing = unit(6, 'lines'),
        legend.key.width = unit(0.5, 'cm'),
        legend.key.height  = unit(0.2, 'cm'))
Figure. Flow map of parks and departure points

Figure. Flow map of parks and departure points

ggplot(data = depaCountPlotHF) + 
  geom_sf(data = pprServiceArea %>% st_transform(crs=4326),fill ="transparent", color="black",size=0.5)+
  geom_sf(data=pprDistrict %>% st_transform(crs=4326),fill ="transparent", color="black",size=1)+
  geom_curve(aes(x = departure.x, y = departure.y, xend = park.x, yend = park.y,color = q5(visitors)),
             size = 1,curvature = -0.2, alpha = 0.4)+
  scale_color_manual(values = palette5,
                     labels = qBr(depaCountPlot2,"visitors"),
                     name = "Device Visits (Quintile Breaks)") +
  labs(x="",y="")+
  mapTheme()+
  theme(legend.position = "bottom",panel.spacing = unit(6, 'lines'),
        legend.key.width = unit(0.5, 'cm'),
        legend.key.height  = unit(0.2, 'cm'))
Figure. Flow map of parks and departure - High Frequency

Figure. Flow map of parks and departure - High Frequency

Weekday Pattern
Dwelling Time
dwellTimeForPlot <- dwellTime %>% 
  mutate(dwellTimes = recode(dwellTimes,
                             "<5" = 2.5,
                             "5-10" = 7.5,
                             "11-20" = 15,
                             "21-60" = 40,
                             "61-120" = 90,
                             "121-240" = 180,
                             ">240" = 0)) %>%
  mutate(sepTotalDwellTime = (visitors*dwellTimes)) %>% 
  group_by(placekey) %>% 
  mutate(totalVisitors=sum(visitors) )%>%
  filter(totalVisitors>50) %>% 
  mutate(avgDwellTime=sum(sepTotalDwellTime)/totalVisitors) %>% 
  dplyr::select(placekey,avgDwellTime) %>% 
  distinct()

ggplot(dwellTimeForPlot)+
  geom_sf(data=pprServiceArea,color='black',size=0.25,fill= "transparent")+
  geom_sf(data=pprDistrict,color='black',size=0.75,fill='transparent')+
  geom_sf(aes(size = avgDwellTime,color = avgDwellTime),alpha = 0.5) +
  scale_size_continuous(range = c(0,2),name = "avgDwellTime")+
  scale_color_continuous(low = '#FFDEDB',high ='#FF2903',
                     name = "avgDwellTime") +
  mapTheme()+
  theme(legend.position = "bottom",
        legend.key.width = unit(0.5, 'cm'),
        legend.key.height  = unit(0.2, 'cm'))
Figure. Map of dwell time

Figure. Map of dwell time

dwellTImePlotServiceArea <- pprServiceArea %>%
  st_join(dwellTimeForPlot,left =TRUE) %>% 
  drop_na(INFO) %>% 
  dplyr::select(INFO,avgDwellTime,geometry) %>% 
  group_by(INFO) %>% 
  mutate(avgDwellTime=mean(avgDwellTime)) %>% 
  distinct() %>% 
  st_sf() %>% 
  st_transform(crs=crs)

ggplot(dwellTImePlotServiceArea)+
  geom_sf(aes(fill = q5(avgDwellTime)),color = 'white',size=0.5) +
  scale_fill_manual(values = palette5,labels = qBr(dwellTImePlotServiceArea,'avgDwellTime'),name = "Average Device Dwelling Time") +
  mapTheme()+
  theme(legend.position = "bottom",
        legend.key.width = unit(0.5, 'cm'),
        legend.key.height  = unit(0.2, 'cm'))
Figure. Map of average dwelling time

Figure. Map of average dwelling time

Case Analysis

Conflicts between Mobility Data and Position Usage
# calculate mean dwell time
dwellMean <- dwellTime %>% 
  st_drop_geometry() %>% 
  mutate(dwell = case_when(dwellTimes=="5-10" ~ 7.5,
                           dwellTimes=="11-20" ~ 15.5,
                           dwellTimes=="21-60" ~ 40.5,
                           dwellTimes=="61-120" ~ 90.5,
                           dwellTimes=="121-240" ~ 180.5,
                           dwellTimes==">240" ~ 270),
         dwellMuti = dwell*visitors) %>% 
  group_by(placekey, month) %>% 
  summarize(dwellMuti = sum(dwellMuti),
            visitors = sum(visitors)) %>% 
  mutate(meanDwell = dwellMuti/visitors)
## `summarise()` has grouped output by 'placekey'. You can override using the `.groups` argument.
# integrate dwell, total counts and permit
sgIntegrated_byMonth <- full_join(dwellMean %>% dplyr::select(placekey,month,meanDwell), 
                          visitCount %>% 
                            st_drop_geometry() %>% 
                            group_by(placekey, month) %>% 
                            summarize(visits = sum(visits)),
                          by=c('placekey','month')) %>% 
  na.omit() %>% 
  ungroup() %>% 
  left_join(propertyWithPlaceKey %>% dplyr::select(placekey, OBJECTID)) %>% 
  left_join(permit2021.join %>% 
              group_by(PPR_Properties_ObjectID) %>% 
              summarize(permits = n()),
            by=c("OBJECTID"="PPR_Properties_ObjectID")) %>% 
  na.omit() %>% 
  st_sf() %>% 
  dplyr::select(-OBJECTID) %>% 
  group_by(placekey, month) %>% 
  summarize(permits = sum(permits),
            visits = mean(visits),
            meanDwell = mean(meanDwell)) %>% 
  ungroup()
## `summarise()` has grouped output by 'placekey'. You can override using the `.groups` argument.
## Joining, by = "placekey"
## `summarise()` has grouped output by 'placekey'. You can override using the `.groups` argument.
# normalize
sgIntegrated_byMonth.scale <- scale(sgIntegrated_byMonth %>% 
                                      dplyr::select(-placekey,-month) %>% 
                                      st_drop_geometry())

set.seed(1234)

# decide cluster number (only run once)

# nc <- NbClust(sgIntegrated_byMonth.scale, min.nc=2, max.nc=15, method="kmeans", index="all")
# table(nc$Best.n[1,])
# 
# barplot(table(nc$Best.n[1,]),
#         xlab="Numer of Clusters", ylab="Number of Criteria",
#         main="Number of Clusters Chosen by 26 Criteria")


# Run K-Means cluster
cluster2 <- kmeans(sgIntegrated_byMonth.scale, 4) 
summary(cluster2)
##              Length Class  Mode   
## cluster      948    -none- numeric
## centers       12    -none- numeric
## totss          1    -none- numeric
## withinss       4    -none- numeric
## tot.withinss   1    -none- numeric
## betweenss      1    -none- numeric
## size           4    -none- numeric
## iter           1    -none- numeric
## ifault         1    -none- numeric
# add cluster number back
sgIntegrated_byMonth <- sgIntegrated_byMonth %>% 
  mutate(cluster = cluster2$cluster)

# mean by cluster
cluster2_mean <- aggregate(sgIntegrated_byMonth %>% 
                             dplyr::select(-placekey, -cluster,-month) %>% 
                             st_drop_geometry(),
                       by=list(cluster=sgIntegrated_byMonth$cluster),
                       FUN=mean) %>% 
  left_join(sgIntegrated_byMonth %>% 
              st_drop_geometry() %>% 
              group_by(cluster) %>% 
              summarize(size = n()),
            by="cluster")

kable(cluster2_mean,align = 'c',caption = '<center>Table 3. Mean values of clusters for conflicts <center/>') %>%
  kable_classic(full_width = F)%>%
  kable_styling(position = "center")%>%
  scroll_box(width = "100%", height = "400px")
Table 3. Mean values of clusters for conflicts
cluster permits visits meanDwell size
1 12 471 68 414
2 48 587 77 187
3 12 411 110 289
4 12 9855 191 58
## put histograms on the diagonal
panel.hist <- function(x, ...)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, ...)
}

#Color points by groups
my_cols <- palette4[1:4]

pairs(sgIntegrated_byMonth %>% 
        st_drop_geometry() %>% 
        dplyr::select(meanDwell,visits,permits), 
      pch = 19,  cex = 1, diag.panel = panel.hist,
      col = my_cols[sgIntegrated_byMonth$cluster],
      lower.panel=NULL, panel = panel.smooth)
Figure. Correlation Matrix of Conflicts between Mobility Data $ Activities

Figure. Correlation Matrix of Conflicts between Mobility Data $ Activities

sgIntegrated_byMonth %>%
  st_drop_geometry() %>% 
  filter(cluster == 2) %>% 
  group_by(placekey) %>% 
  summarize(howManyMonthBelongToThisCluster=n()) %>% 
  kable(.,align = 'c') %>%
  kable_classic(full_width = F)%>%
  kable_styling(position = "center")%>%
  scroll_box(width = "100%", height = "400px")
sgIntegrated_byMonth %>%
  st_drop_geometry() %>% 
  filter(cluster == 4) %>% 
  group_by(placekey) %>% 
  summarize(size=n()) %>% 
  kable(.,align = 'c') %>%
  kable_classic(full_width = F)%>%
  kable_styling(position = "center")%>%
  scroll_box(width = "100%", height = "400px")

cluster1 & 3 : Comparatively normal positions. The dwell pattern is different for these two clusters.

cluster 2 (choose one place with a howManyMonthBelongToThisCluster of 11, there are many of them): normal performance on sg data (dwell and visits), but with many permit plans

cluster4 ‘zzz-222@63s-dvq-5fz’ ‘zzz-222@628-pm9-jn5’ ‘zzw-222@628-pg9-tgk’ (choose one or more): perform well in sg data (dwell and visits), but not more permit plans

Specific Cases1 (templete)

These should come from the outcome of clustering Analysis

visitCount789 <- 
  st_join(visitCount %>% st_transform(crs=4326),pprServiceArea%>% st_transform(crs=4326),left=TRUE) %>% 
  filter(PPR_DIST %in% c(7,8,9))

visitCount789 <- visitCount789 %>% 
  dplyr::select(placekey,visits) %>% 
  st_drop_geometry() %>% 
  group_by(placekey) %>% 
  mutate(totalVisits = sum(visits)) %>% 
  dplyr::select(-visits) %>% 
  distinct()
Figure Matthias Baldwin Park

Figure Matthias Baldwin Park

Figure Matthias Baldwin Park

Figure Matthias Baldwin Park

sumVisit <- dwellTime %>% 
  filter(placekey=='zzz-222@63s-dvq-5fz') %>% 
  dplyr::select(-month) %>% 
  group_by(dwellTimes) %>%
  summarise(visitors=sum(visitors)) %>% 
  st_drop_geometry() 

sumVisit$dwellTimes <- factor(sumVisit$dwellTimes, levels= c("<5","5-10","11-20","21-60","61-120","121-240",">240" ))

sumVisit%>%
  ggplot(aes(dwellTimes,visitors)) +
  geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
  labs(x="Dwell Time", y="Aggregated Visitors",
       title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
  plotTheme(10,20)
Figure. Map of dwell time

Figure. Map of dwell time

visitCount %>% 
  filter(placekey=='zzz-222@63s-dvq-5fz') %>% 
  st_drop_geometry()%>%
  na.omit()%>%
  ggplot(aes(visitDay,visits)) + 
  geom_line(color=palette1_main,size=1)+
  labs(title = 'Christy Recreation Center (zzz-222@63s-dvq-5fz)',x="Visit Date",y="Safegraph Visit")+
  plotTheme(10,20)+
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1))
Figure. Map of dwell time

Figure. Map of dwell time

sumVisit <- visitHour %>% 
  filter(placekey=='zzz-222@63s-dvq-5fz') %>% 
  dplyr::select(-month) %>% 
  group_by(hour) %>%
  summarise(visits=sum(visit)) %>% 
  st_drop_geometry()

sumVisit%>%
  ggplot(aes(hour,visits)) +
  geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
  labs(x="hour", y="Aggregated Visits",
       title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
  plotTheme(10,20)
Figure. Map of visit time

Figure. Map of visit time

And its reasons analysis

Specific Cases2 (templete)

These should come from the outcome of clustering Analysis

visitCount789 <- 
  st_join(visitCount %>% st_transform(crs=4326),pprServiceArea%>% st_transform(crs=4326),left=TRUE) %>% 
  filter(PPR_DIST %in% c(7,8,9))

visitCount789 <- visitCount789 %>% 
  dplyr::select(placekey,visits) %>% 
  st_drop_geometry() %>% 
  group_by(placekey) %>% 
  mutate(totalVisits = sum(visits)) %>% 
  dplyr::select(-visits) %>% 
  distinct()
Figure Matthias Baldwin Park

Figure Matthias Baldwin Park

Figure Matthias Baldwin Park

Figure Matthias Baldwin Park

sumVisit <- dwellTime %>% 
  filter(placekey=='zzz-222@63s-dvq-5fz') %>% 
  dplyr::select(-month) %>% 
  group_by(dwellTimes) %>%
  summarise(visitors=sum(visitors)) %>% 
  st_drop_geometry() 

sumVisit$dwellTimes <- factor(sumVisit$dwellTimes, levels= c("<5","5-10","11-20","21-60","61-120","121-240",">240" ))

sumVisit%>%
  ggplot(aes(dwellTimes,visitors)) +
  geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
  labs(x="Dwell Time", y="Aggregated Visitors",
       title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
  plotTheme(10,20)
Figure. Map of dwell time

Figure. Map of dwell time

visitCount %>% 
  filter(placekey=='zzz-222@63s-dvq-5fz') %>% 
  st_drop_geometry()%>%
  na.omit()%>%
  ggplot(aes(visitDay,visits)) + 
  geom_line(color=palette1_main,size=1)+
  labs(title = 'Christy Recreation Center (zzz-222@63s-dvq-5fz)',x="Visit Date",y="Safegraph Visit")+
  plotTheme(10,20)+
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1))
Figure. Map of dwell time

Figure. Map of dwell time

sumVisit <- visitHour %>% 
  filter(placekey=='zzz-222@63s-dvq-5fz') %>% 
  dplyr::select(-month) %>% 
  group_by(hour) %>%
  summarise(visits=sum(visit)) %>% 
  st_drop_geometry()

sumVisit%>%
  ggplot(aes(hour,visits)) +
  geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
  labs(x="hour", y="Aggregated Visits",
       title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
  plotTheme(10,20)
Figure. Map of visit time

Figure. Map of visit time

And its reasons analysis

Specific Cases3 (templete)

These should come from the outcome of clustering Analysis

visitCount789 <- 
  st_join(visitCount %>% st_transform(crs=4326),pprServiceArea%>% st_transform(crs=4326),left=TRUE) %>% 
  filter(PPR_DIST %in% c(7,8,9))

visitCount789 <- visitCount789 %>% 
  dplyr::select(placekey,visits) %>% 
  st_drop_geometry() %>% 
  group_by(placekey) %>% 
  mutate(totalVisits = sum(visits)) %>% 
  dplyr::select(-visits) %>% 
  distinct()
Figure Matthias Baldwin Park

Figure Matthias Baldwin Park

Figure Matthias Baldwin Park

Figure Matthias Baldwin Park

sumVisit <- dwellTime %>% 
  filter(placekey=='zzz-222@63s-dvq-5fz') %>% 
  dplyr::select(-month) %>% 
  group_by(dwellTimes) %>%
  summarise(visitors=sum(visitors)) %>% 
  st_drop_geometry() 

sumVisit$dwellTimes <- factor(sumVisit$dwellTimes, levels= c("<5","5-10","11-20","21-60","61-120","121-240",">240" ))

sumVisit%>%
  ggplot(aes(dwellTimes,visitors)) +
  geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
  labs(x="Dwell Time", y="Aggregated Visitors",
       title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
  plotTheme(10,20)
Figure. Map of dwell time

Figure. Map of dwell time

visitCount %>% 
  filter(placekey=='zzz-222@63s-dvq-5fz') %>% 
  st_drop_geometry()%>%
  na.omit()%>%
  ggplot(aes(visitDay,visits)) + 
  geom_line(color=palette1_main,size=1)+
  labs(title = 'Christy Recreation Center (zzz-222@63s-dvq-5fz)',x="Visit Date",y="Safegraph Visit")+
  plotTheme(10,20)+
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1))
Figure. Map of dwell time

Figure. Map of dwell time

sumVisit <- visitHour %>% 
  filter(placekey=='zzz-222@63s-dvq-5fz') %>% 
  dplyr::select(-month) %>% 
  group_by(hour) %>%
  summarise(visits=sum(visit)) %>% 
  st_drop_geometry()

sumVisit%>%
  ggplot(aes(hour,visits)) +
  geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
  labs(x="hour", y="Aggregated Visits",
       title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
  plotTheme(10,20)
Figure. Map of visit time

Figure. Map of visit time

And its reasons analysis

Next Stage Plan

Use Case

Jeff Dashboard